** Using CPS data, create variables indicating whether household includes:
**		- any children under age 19
**		- any parents/ any mothers of those kids
** 		- any parents of those parents (and hence grandparents of the children)
**********************************************************************************

** Read in raw CPS data
quietly do CPS/cps_00036.do

** Identify kids who are under age 19 (hence age-eligible for EITC) and living
** with their mothers

***CPS variables momloc & poploc 
***momloc==00 if no mom in HH, same for poploc 
gen Under19 = (age<19 & marst~=1 & marst~=2)
gen HaveMominHH = (momloc~=0)
gen HaveDadinHH = (poploc~=0)

gen Under19_LiveWithMom = (Under19==1 & HaveMominHH==1)
gen Under19_LiveWithDad = (Under19==1 & HaveDadinHH==1)
gen Under19_LiveWithParent = (Under19_LiveWithMom==1 | Under19_LiveWithDad==1)

** Identify kids who are under age 24 and full-time students
*** schlcoll==1 for high school full time, schlcoll==3 for college/university full time 
***(universe for CPS is 16-24) 
gen Under24FTS = (age<24 & (schlcoll==1 | schlcoll==3) & marst~=1 & marst~=2)
gen Under24FTS_LiveWithMom = (Under24FTS==1 & HaveMominHH==1) 
gen Under24FTS_LiveWithDad = (Under24FTS==1 & HaveDadinHH==1)
gen Under24FTS_LiveWithParent = (Under24FTS==1 & (HaveMominHH==1 | HaveDadinHH==1))

gen PotentialDep = (Under19_LiveWithParent==1 | Under24FTS_LiveWithParent==1) 
gen PotentialDep_LiveWithMom = (Under19_LiveWithMom==1 | Under24FTS_LiveWithMom==1) 
gen PotentialDep_LiveWithDad = (Under19_LiveWithDad==1 | Under24FTS_LiveWithDad==1)


** Record info about age of youngest potential dep in the household
gen temp = age
replace temp=. if PotentialDep~=1
sort serial year month
by serial year month: egen MinAge_PotentialDep = min(temp)
drop temp


** Count up number of such kids per household 
** (and per month, but this shouldn't make any difference in ASEC data, because
** you don't have multiple months per person)
sort serial year month
by serial year month: egen N_PotentialDep = total(PotentialDep) 
by serial year month: egen N_PotentialDep_LiveWithMom = total(PotentialDep_LiveWithMom) 
by serial year month: egen N_PotentialDep_LiveWithDad = total(PotentialDep_LiveWithDad) 


** On the kid-under-19 records, record the person number of the person 
** who is the listed mom of that under-19 kid. 
gen pernum_mom=momloc if PotentialDep_LiveWithMom==1


** Count up number of distinct moms-of-under-19 kids
egen distinctmomtag = tag(serial year month pernum_mom)
sort serial year month 
by serial year month: egen N_DistinctMoms = total(distinctmomtag)


** At household-month level, record person numbers of up to 4 people
** identified as moms of an under-19 kid living with parent
sort serial year month pernum_mom
by serial year month: egen temp_mom_loc1 = min(pernum_mom)

replace pernum_mom=. if pernum_mom==temp_mom_loc1
sort serial year month pernum_mom 
by serial year month: egen temp_mom_loc2 = min(pernum_mom) 

replace pernum_mom=. if pernum_mom==temp_mom_loc2
sort serial year month pernum_mom 
by serial year month: egen temp_mom_loc3 = min(pernum_mom) 

replace pernum_mom=. if pernum_mom==temp_mom_loc3
sort serial year month pernum_mom
by serial year month: egen temp_mom_loc4 = min(pernum_mom) 


** Generate an indicator, at the individual level, for being a mom of under-19 kid
gen PotentialMidGenMom=(pernum==temp_mom_loc1 | pernum==temp_mom_loc2 | pernum==temp_mom_loc3 | pernum==temp_mom_loc4) 


** Repeat for dads: count up number of distinct identified dads in household,
** at HH level record person numbers of up to 4 people identified as dads of 
** under-19 kids 
gen pernum_dad=poploc if PotentialDep_LiveWithDad==1

egen distinctdadtag = tag(serial year month pernum_dad) 
sort serial year month 
by serial year month: egen N_DistinctDads = total(distinctdadtag) 

gen N_DistinctParents = N_DistinctMoms + N_DistinctDads

** At household-month level, record person numbers of up to 4 people
** identified as dads of an under-19 kid living with parent
sort serial year month pernum_dad
by serial year month: egen temp_pop_loc1 = min(pernum_dad) 

replace pernum_dad=. if pernum_dad==temp_pop_loc1
sort serial year month pernum_dad
by serial year month: egen temp_pop_loc2 = min(pernum_dad)

replace pernum_dad=. if pernum_dad==temp_pop_loc2
sort serial year month pernum_dad
by serial year month: egen temp_pop_loc3 = min(pernum_dad)

replace pernum_dad=. if pernum_dad==temp_pop_loc3
sort serial year month pernum_dad
by serial year month: egen temp_pop_loc4 = min(pernum_dad) 


** Generate an indicator, at the individual level, for being a dad of under-19 kid
gen PotentialMidGenDad=(pernum==temp_pop_loc1 | pernum==temp_pop_loc2 | pernum==temp_pop_loc3 | pernum==temp_pop_loc4) 

gen PotentialMidGenParent=(PotentialMidGenMom==1 | PotentialMidGenDad==1)


** Drop intermediate variables that were used in the identification of parents
drop temp_mom_loc1 temp_mom_loc2 temp_mom_loc3 temp_mom_loc4 temp_pop_loc1 temp_pop_loc2 temp_pop_loc3 temp_pop_loc4


** Now identify if the PotentialMidGenParent has his/her own mother in HH
gen ParentHasMom = (PotentialMidGenParent==1 & momloc~=00) 


** Count up how many potential-mid-gen-parents in the HH have their own parents in HH
sort serial year month 
by serial year month: egen N_Parent_LiveWithMom = total(ParentHasMom) 


** On the potential-mid-gen-parents records, record the person number of the person 
** who is the listed mom of that mid-gen parent 
gen pernum_gmom=momloc if ParentHasMom==1


** Count up number of distinct mothers-of-parents
egen distinctgmomtag = tag(serial year month pernum_gmom) 
sort serial year month 
by serial year month: egen N_DistinctGMom = total(distinctgmomtag) 



** At household-month level, record person numbers of up to 2 people (MAY COME BACK AND EXPAND)
** identified as moms of a parent
sort serial year month pernum_gmom
by serial year month: egen temp_gmom_loc1 = min(pernum_gmom) 

replace pernum_gmom=. if pernum_gmom==temp_gmom_loc1
sort serial year month pernum_gmom
by serial year month: egen temp_gmom_loc2 = min(pernum_gmom) 


** Now identify if the PotentialMidGenParent has his/her own father in HH
gen ParentHasDad = (PotentialMidGenParent==1 & poploc~=00) 

gen ParentHasParent = (ParentHasMom==1 | ParentHasDad==1) 



** Count up how many potential-mid-gen-parents in the HH have their own dads in HH
sort serial year month 
by serial year month: egen N_Parent_LiveWithDad = total(ParentHasDad) 

** On the potential-mid-gen-parents records, record the person number of the person 
** who is the listed dad of that mid-gen parent 
gen pernum_gpop=poploc if ParentHasDad==1




** Count up number of distinct dads-of-parents, then parents-of-parents
egen distinctgdadtag = tag(serial year month pernum_gpop)
sort serial year month 
by serial year month: egen N_DistinctGDad = total(distinctgdadtag) 

gen N_DistinctGP = N_DistinctGMom + N_DistinctGDad


** At household-month level, record person numbers of up to 2 people (MAY COME BACK AND EXPAND)
** identified as dads of a parent
sort serial year month pernum_gpop
by serial year month: egen temp_gpop_loc1 = min(pernum_gpop) 

replace pernum_gpop=. if pernum_gpop==temp_gpop_loc1
sort serial year month pernum_gpop
by serial year month: egen temp_gpop_loc2 = min(pernum_gpop) 


** Generate an indicator, at the individual level, for being a mom or dad of a parent (a grandparent)
gen PotentialOldGenParent=(pernum==temp_gmom_loc1 | pernum==temp_gmom_loc2 | pernum==temp_gpop_loc1 | pernum==temp_gpop_loc2) 


** Identify spouses of these grandparents

*** The variables constructed for this section are as follows 
***temp_epn_spouse_gm1 is temp_sploc_gm1
***epn_spouse_gm1 is sploc_gm1
gen temp_sploc_gm1 = sploc if pernum==temp_gmom_loc1
sort serial year month 
by serial year month: egen pernum_spouse_gm1 = min(temp_sploc_gm1) 
gen SpouseOfGM1=(pernum==pernum_spouse_gm1) 
replace PotentialOldGenParent=1 if SpouseOfGM1==1

gen temp_sploc_gm2 = sploc if pernum==temp_gmom_loc2
sort serial year month 
by serial year month: egen pernum_spouse_gm2 = min(temp_sploc_gm2)
gen SpouseOfGM2=(pernum==pernum_spouse_gm2) 
replace PotentialOldGenParent=1 if SpouseOfGM2==1

gen temp_sploc_gd1 = sploc if pernum==temp_gpop_loc1
sort serial year month 
by serial year month: egen pernum_spouse_gd1 = min(temp_sploc_gd1) 
gen SpouseOfGD1=(pernum==pernum_spouse_gd1) 
replace PotentialOldGenParent=1 if SpouseOfGD1==1

gen temp_sploc_gd2 = sploc if pernum==temp_gpop_loc2
sort serial year month 
by serial year month: egen pernum_spouse_gd2 = min(temp_sploc_gd2) 
gen SpouseOfGD2=(pernum==pernum_spouse_gd2) 
replace PotentialOldGenParent=1 if SpouseOfGD2==1


*** Identify Potential Old Gen Mom 
gen PotentialOldGenMom = ((pernum==temp_gmom_loc1 | pernum==temp_gmom_loc2) & sex==2) 


** Make other demographic variables

gen Employed = (empstat==10 | empstat==12) 

gen HSGrad = (educ99==10) 
gen SomeCollege = (educ99==11 | educ99==12 | educ99==13 | educ99==14) 
gen CollegeGrad = (educ99==15 | educ99==16 | educ99==17 | educ99==18) 

gen Female = (sex==2) 

gen White = (race==100) 

gen Married = (marst==1 | marst==2) 

gen FTStudent = (schlcoll==1 | schlcoll==3) 

gen essself = (incss>0 & incss~=.)

** Deal with codes for missing values
replace incwage = . if incwage==99999999
replace ftotval = . if ftotval==999999999
replace inctot = . if inctot==999999999
replace hhincome = . if hhincome==99999999
replace incss = . if incss==999999

save TempData, replace 


*******************************************************************************
** Keep and save just the data on mid-gen parents WHO ARE NOT ALSO POTENTIAL
** OLD GENERATION PARENTS
keep if PotentialMidGenParent==1 & PotentialOldGenParent~=1

keep serial year month pernum PotentialMidGenParent PotentialMidGenMom age Employed HSGrad SomeCollege CollegeGrad Female White Married ParentHasParent N_DistinctParents N_DistinctGP MinAge_PotentialDep FTStudent relate statefip nchild health  incwage ftotval hhincome asecwt FTStudent PotentialDep

duplicates drop 

save CPS/MidGenParentData, replace
clear 


*******************************************************************************
** Keep and save just the data on old-gen parents
use TempData
**keep if PotentialOldGenParent==1 & PotentialMidGenParent~=1
keep if PotentialOldGenParent==1

keep serial year month pernum PotentialOldGenParent PotentialOldGenMom PotentialMidGenParent age Employed HSGrad SomeCollege CollegeGrad Female White Married N_DistinctGP N_PotentialDep MinAge_PotentialDep relate statefip nchild health incwage ftotval hhincome incss essself asecwt 

duplicates drop 

save CPS/OldGenParentData, replace 
clear 